package cn.com.mr01.lianxi.mr_lianxi_04;

import java.io.IOException;

import org.apache.commons.lang.WordUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TokenizerMapper extends
		Mapper<LongWritable, Text, Text, IntWritable> {

	IntWritable n = new IntWritable();
	Text k = new Text();

	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		// 业务代码
		// 如何设置 key
		String[] words = value.toString().split(",");
		// 2.将每个单词封装成<k,v>
		// <text,int>
		char c = ' ';
		for (String word : words) {
			c = word.charAt(0);
			if (c >= '0' && c <= '9') {
				// 封装<k,v>
				k.set("num");
				n.set(1);
				context.write(k, n);
			} else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') {
				// 封装<k,v>
				k.set("word");
				n.set(1);
				context.write(k, n);
			} else {
				// 封装<k,v>
				k.set("china");
				n.set(1);
				context.write(k, n);
			}

		}
	}
}
